This notebook will try to predict BTC price through:
# number of total past observations from the original dataset to be considered
n_past_total = 1200 #1500 #2000
# number of past observations to be considered for the LSTM training and prediction
n_past = 120 #30 #50
# number of future datapoints to predict (if higher than 1, the model switch to Multi-Step)
n_future = 30 #10
# activation function used for the RNN (softsign, relu, sigmoid)
activation = 'softsign'
# dropout for the hidden layers
dropout = 0.2
# number of hidden layers
n_layers = 6
# number of neurons of the hidden layers
n_neurons = 15
# features to be considered for training (if only one is Close, then its Univariate, if more, then it's Multivariate)
features = ['Close', 'Volume']
# number of inputs features (if higher than 1, )
n_features = len(features)
# patience for the early stopping (number of epochs)
patience = 25
# optimizer (adam, RMSprop)
optimizer='adam'
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
pd.set_option('display.float_format', lambda x: '%.3f' % x) #avoid scientific notation
np.set_printoptions(suppress=True)
import datetime
import math
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score
from keras.callbacks import EarlyStopping
#import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import Image
# reading csv
dataset = pd.read_csv('yahoo_BTC-USD.csv')
dataset
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 2014-09-17 | 465.864 | 468.174 | 452.422 | 457.334 | 457.334 | 21056800.000 |
| 1 | 2014-09-18 | 456.860 | 456.860 | 413.104 | 424.440 | 424.440 | 34483200.000 |
| 2 | 2014-09-19 | 424.103 | 427.835 | 384.532 | 394.796 | 394.796 | 37919700.000 |
| 3 | 2014-09-20 | 394.673 | 423.296 | 389.883 | 408.904 | 408.904 | 36863600.000 |
| 4 | 2014-09-21 | 408.085 | 412.426 | 393.181 | 398.821 | 398.821 | 26580100.000 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2449 | 2021-06-01 | 37293.793 | 37896.734 | 35787.086 | 36684.926 | 36684.926 | 34639423297.000 |
| 2450 | 2021-06-02 | 36699.922 | 38231.340 | 35966.309 | 37575.180 | 37575.180 | 33070867190.000 |
| 2451 | 2021-06-03 | 37599.410 | 39478.953 | 37243.973 | 39208.766 | 39208.766 | 35460750427.000 |
| 2452 | 2021-06-04 | 39242.484 | 39242.484 | 35717.723 | 36894.406 | 36894.406 | 41831090187.000 |
| 2453 | 2021-06-05 | 36780.348 | 37908.945 | 35548.820 | 35746.102 | 35746.102 | 34500784128.000 |
2454 rows × 7 columns
# checking for nulls
dataset.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2454 entries, 0 to 2453 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 2454 non-null object 1 Open 2450 non-null float64 2 High 2450 non-null float64 3 Low 2450 non-null float64 4 Close 2450 non-null float64 5 Adj Close 2450 non-null float64 6 Volume 2450 non-null float64 dtypes: float64(6), object(1) memory usage: 134.3+ KB
# checking if close is not equal to adj close
dataset[dataset['Close']!=dataset['Adj Close']]
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 2039 | 2020-04-17 | nan | nan | nan | nan | nan | nan |
| 2214 | 2020-10-09 | nan | nan | nan | nan | nan | nan |
| 2217 | 2020-10-12 | nan | nan | nan | nan | nan | nan |
| 2218 | 2020-10-13 | nan | nan | nan | nan | nan | nan |
# checking the main parameters
dataset.describe()
| Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|
| count | 2450.000 | 2450.000 | 2450.000 | 2450.000 | 2450.000 | 2450.000 |
| mean | 7626.843 | 7844.916 | 7387.271 | 7640.137 | 7640.137 | 12616231026.884 |
| std | 11665.490 | 12032.959 | 11232.481 | 11676.218 | 11676.218 | 19727883639.959 |
| min | 176.897 | 211.731 | 171.510 | 178.103 | 178.103 | 5914570.000 |
| 25% | 460.801 | 466.735 | 455.070 | 460.719 | 460.719 | 68730922.000 |
| 50% | 4324.400 | 4403.835 | 4138.012 | 4330.050 | 4330.050 | 4010640000.000 |
| 75% | 9193.489 | 9355.582 | 9030.895 | 9203.736 | 9203.736 | 18684308124.500 |
| max | 63523.754 | 64863.098 | 62208.965 | 63503.457 | 63503.457 | 350967941479.000 |
# use close only and fill NaN with ffil
df = dataset.set_index('Date')[features]#.tail(n_past_total)
df = df.set_index(pd.to_datetime(df.index))
df.fillna(method='ffill',inplace=True)
# looking at the correlation of the main possible variables
dataset[['Close','Volume']].corr()
| Close | Volume | |
|---|---|---|
| Close | 1.000 | 0.798 |
| Volume | 0.798 | 1.000 |
# plotting Closing Price and Volume
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(go.Scatter(x=df[features[0]].index, y=df[features[0]].values, name=features[0]), secondary_y=False)
fig.add_trace(go.Scatter(x=df[features[1]].index, y=df[features[1]].values, name=features[1]), secondary_y=True)
# Add figure title
fig.update_layout(title_text="BTC: {}, {}".format(features[0], features[1]))
# Set x-axis title
fig.update_xaxes(title_text='<b>'+df.index.name+'</b>')
# Set y-axes titles
fig.update_yaxes(title_text='<b>'+features[0]+'</b>', secondary_y=False)
fig.update_yaxes(title_text='<b>'+features[1]+'</b>', secondary_y=True)
# Adding slider
fig.update_xaxes(
rangeslider_visible=True,
rangeselector=dict(
buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")
])
)
)
fig.show()
We can observe a spike in volume around end of Feb2021. Let's take a look.
# looking at Feb21 datapoints
df[(df.index>'2021-02-01') & (df.index<'2021-03-01')].style.bar(subset=['Volume'], color='#d65f5f')
| Close | Volume | |
|---|---|---|
| Date | ||
| 2021-02-02 00:00:00 | 35510.289063 | 63088585433.000000 |
| 2021-02-03 00:00:00 | 37472.089844 | 61166818159.000000 |
| 2021-02-04 00:00:00 | 36926.066406 | 68838074392.000000 |
| 2021-02-05 00:00:00 | 38144.308594 | 58598066402.000000 |
| 2021-02-06 00:00:00 | 39266.011719 | 71326033653.000000 |
| 2021-02-07 00:00:00 | 38903.441406 | 65500641143.000000 |
| 2021-02-08 00:00:00 | 46196.464844 | 101467222687.000000 |
| 2021-02-09 00:00:00 | 46481.105469 | 91809846886.000000 |
| 2021-02-10 00:00:00 | 44918.183594 | 87301089896.000000 |
| 2021-02-11 00:00:00 | 47909.332031 | 81388911810.000000 |
| 2021-02-12 00:00:00 | 47504.851563 | 76555041196.000000 |
| 2021-02-13 00:00:00 | 47105.515625 | 70250456155.000000 |
| 2021-02-14 00:00:00 | 48717.289063 | 71248675228.000000 |
| 2021-02-15 00:00:00 | 47945.058594 | 77069903166.000000 |
| 2021-02-16 00:00:00 | 49199.871094 | 77049582886.000000 |
| 2021-02-17 00:00:00 | 52149.007813 | 80820545404.000000 |
| 2021-02-18 00:00:00 | 51679.796875 | 52054723579.000000 |
| 2021-02-19 00:00:00 | 55888.132813 | 63495496918.000000 |
| 2021-02-20 00:00:00 | 56099.519531 | 68145460026.000000 |
| 2021-02-21 00:00:00 | 57539.945313 | 51897585191.000000 |
| 2021-02-22 00:00:00 | 54207.320313 | 92052420332.000000 |
| 2021-02-23 00:00:00 | 48824.425781 | 106102492824.000000 |
| 2021-02-24 00:00:00 | 49705.332031 | 63695521388.000000 |
| 2021-02-25 00:00:00 | 47093.851563 | 54506565949.000000 |
| 2021-02-26 00:00:00 | 46339.761719 | 350967941479.000000 |
| 2021-02-27 00:00:00 | 46188.453125 | 45910946382.000000 |
| 2021-02-28 00:00:00 | 45137.769531 | 53443887451.000000 |
After checking from a different source, the Volume for 2021-02-26 seems to be correct (USD 350.967.941.479). Looks like that day was crazy. https://coinmarketcap.com/historical/20210226/
Image('img/coinmarketcap-20210226.png')
# removing firsts observations becuase they may not be representative of BTC behaviour now due to beginnings of crypto market
df = df.tail(n_past_total)
# train test split
#n_future = 10
df_train = df.head(-n_future)
df_test = df.tail(n_future)
training_set = df_train.values
test_set = df_test.values
print('training_set.shape:\t', training_set.shape)
print('test_set.shape:\t\t', test_set.shape)
training_set.shape: (1170, 2) test_set.shape: (30, 2)
# scale
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)
print('training_set_scaled.shape: ', training_set_scaled.shape)
training_set_scaled
training_set_scaled.shape: (1170, 2)
array([[0.11230479, 0.01470046],
[0.11721794, 0.01383683],
[0.10912011, 0.0114763 ],
...,
[0.83125144, 0.18859968],
[0.89912423, 0.19054371],
[0.88207514, 0.19135386]])
# creating a data structure with 60 timesteps and 1 output
X_train = []
y_train = []
for i in range(n_past, len(training_set_scaled) - n_future + 1):
X_train.append(training_set_scaled[i-n_past:i, :])
y_train.append(training_set_scaled[i:i+n_future, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape, y_train.shape
((1021, 120, 2), (1021, 30))
# reshaping (needed to fit RNN)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_train.shape
(1021, 120, 2)
Build and fit
# Building the RNN
# Initialising the RNN
regressor = Sequential()
# Input layer
regressor.add(LSTM(units=n_past, return_sequences=True, activation=activation, input_shape=(X_train.shape[1], n_features)))
#regressor.add(LSTM(units=neurons, return_sequences=True, activation=activation, input_shape=(X_train.shape[1], 1)))
# Hidden layers
for _ in range(n_layers):
regressor.add(Dropout(dropout))
regressor.add(LSTM(units=n_neurons, return_sequences=True, activation=activation))
# Last hidden layer (changing the return_sequences)
regressor.add(LSTM(units=n_neurons, return_sequences=False, activation=activation))
regressor.add(Dropout(dropout))
# Adding the output layer
regressor.add(Dense(units=n_future))
# Compiling the RNN
regressor.compile(optimizer=optimizer, loss='mse', metrics=['accuracy'])
# Model summary
regressor.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm (LSTM) (None, 120, 120) 59040 _________________________________________________________________ dropout (Dropout) (None, 120, 120) 0 _________________________________________________________________ lstm_1 (LSTM) (None, 120, 15) 8160 _________________________________________________________________ dropout_1 (Dropout) (None, 120, 15) 0 _________________________________________________________________ lstm_2 (LSTM) (None, 120, 15) 1860 _________________________________________________________________ dropout_2 (Dropout) (None, 120, 15) 0 _________________________________________________________________ lstm_3 (LSTM) (None, 120, 15) 1860 _________________________________________________________________ dropout_3 (Dropout) (None, 120, 15) 0 _________________________________________________________________ lstm_4 (LSTM) (None, 120, 15) 1860 _________________________________________________________________ dropout_4 (Dropout) (None, 120, 15) 0 _________________________________________________________________ lstm_5 (LSTM) (None, 120, 15) 1860 _________________________________________________________________ dropout_5 (Dropout) (None, 120, 15) 0 _________________________________________________________________ lstm_6 (LSTM) (None, 120, 15) 1860 _________________________________________________________________ lstm_7 (LSTM) (None, 15) 1860 _________________________________________________________________ dropout_6 (Dropout) (None, 15) 0 _________________________________________________________________ dense (Dense) (None, 30) 480 ================================================================= Total params: 78,840 Trainable params: 78,840 Non-trainable params: 0 _________________________________________________________________
# Adding early stopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=patience)
# Fitting the RNN to the Training set
res = regressor.fit(X_train, y_train
, batch_size=32
, epochs=750
, validation_split=0.1
, callbacks=[early_stop]
)
Epoch 1/750 29/29 [==============================] - 32s 294ms/step - loss: 0.0116 - accuracy: 0.0336 - val_loss: 0.5116 - val_accuracy: 0.0874 Epoch 2/750 29/29 [==============================] - 6s 224ms/step - loss: 0.0079 - accuracy: 0.0470 - val_loss: 0.4471 - val_accuracy: 0.1165 Epoch 3/750 29/29 [==============================] - 6s 219ms/step - loss: 0.0051 - accuracy: 0.0381 - val_loss: 0.3788 - val_accuracy: 0.1165 Epoch 4/750 29/29 [==============================] - 6s 217ms/step - loss: 0.0044 - accuracy: 0.0262 - val_loss: 0.2809 - val_accuracy: 0.1165 Epoch 5/750 29/29 [==============================] - 6s 218ms/step - loss: 0.0029 - accuracy: 0.0349 - val_loss: 0.1533 - val_accuracy: 0.1165 Epoch 6/750 29/29 [==============================] - 6s 217ms/step - loss: 0.0022 - accuracy: 0.0302 - val_loss: 0.0953 - val_accuracy: 0.1165 Epoch 7/750 29/29 [==============================] - 6s 220ms/step - loss: 0.0018 - accuracy: 0.0414 - val_loss: 0.0913 - val_accuracy: 0.1165 Epoch 8/750 29/29 [==============================] - 6s 224ms/step - loss: 0.0020 - accuracy: 0.0300 - val_loss: 0.0981 - val_accuracy: 0.1165 Epoch 9/750 29/29 [==============================] - 7s 231ms/step - loss: 0.0016 - accuracy: 0.0357 - val_loss: 0.0886 - val_accuracy: 0.1165 Epoch 10/750 29/29 [==============================] - 7s 230ms/step - loss: 0.0014 - accuracy: 0.0381 - val_loss: 0.0909 - val_accuracy: 0.1165 Epoch 11/750 29/29 [==============================] - 7s 232ms/step - loss: 0.0011 - accuracy: 0.0557 - val_loss: 0.0812 - val_accuracy: 0.1165 Epoch 12/750 29/29 [==============================] - 7s 231ms/step - loss: 0.0012 - accuracy: 0.0407 - val_loss: 0.0907 - val_accuracy: 0.1165 Epoch 13/750 29/29 [==============================] - 7s 232ms/step - loss: 0.0013 - accuracy: 0.0426 - val_loss: 0.0828 - val_accuracy: 0.1165 Epoch 14/750 29/29 [==============================] - 7s 253ms/step - loss: 0.0012 - accuracy: 0.0379 - val_loss: 0.0842 - val_accuracy: 0.0874 Epoch 15/750 29/29 [==============================] - 7s 229ms/step - loss: 0.0014 - accuracy: 0.0277 - val_loss: 0.0624 - val_accuracy: 0.0874 Epoch 16/750 29/29 [==============================] - 7s 241ms/step - loss: 0.0011 - accuracy: 0.0321 - val_loss: 0.0664 - val_accuracy: 0.1165 Epoch 17/750 29/29 [==============================] - 7s 236ms/step - loss: 9.8281e-04 - accuracy: 0.0461 - val_loss: 0.0472 - val_accuracy: 0.1165 Epoch 18/750 29/29 [==============================] - 7s 232ms/step - loss: 0.0010 - accuracy: 0.0423 - val_loss: 0.0786 - val_accuracy: 0.1165 Epoch 19/750 29/29 [==============================] - 7s 235ms/step - loss: 9.7445e-04 - accuracy: 0.0391 - val_loss: 0.0552 - val_accuracy: 0.0874 Epoch 20/750 29/29 [==============================] - 7s 235ms/step - loss: 0.0011 - accuracy: 0.0470 - val_loss: 0.0638 - val_accuracy: 0.0971 Epoch 21/750 29/29 [==============================] - 7s 230ms/step - loss: 9.2434e-04 - accuracy: 0.0289 - val_loss: 0.0454 - val_accuracy: 0.0291 Epoch 22/750 29/29 [==============================] - 7s 231ms/step - loss: 9.3791e-04 - accuracy: 0.0449 - val_loss: 0.0604 - val_accuracy: 0.0291 Epoch 23/750 29/29 [==============================] - 7s 229ms/step - loss: 0.0011 - accuracy: 0.0578 - val_loss: 0.0738 - val_accuracy: 0.1165 Epoch 24/750 29/29 [==============================] - 7s 229ms/step - loss: 8.8537e-04 - accuracy: 0.0458 - val_loss: 0.0662 - val_accuracy: 0.0291 Epoch 25/750 29/29 [==============================] - 7s 230ms/step - loss: 9.6977e-04 - accuracy: 0.0654 - val_loss: 0.0797 - val_accuracy: 0.0874 Epoch 26/750 29/29 [==============================] - 7s 228ms/step - loss: 8.2211e-04 - accuracy: 0.0296 - val_loss: 0.0582 - val_accuracy: 0.1165 Epoch 27/750 29/29 [==============================] - 7s 229ms/step - loss: 8.3923e-04 - accuracy: 0.0528 - val_loss: 0.0628 - val_accuracy: 0.0874 Epoch 28/750 29/29 [==============================] - 7s 229ms/step - loss: 8.4639e-04 - accuracy: 0.0449 - val_loss: 0.0671 - val_accuracy: 0.0874 Epoch 29/750 29/29 [==============================] - 7s 230ms/step - loss: 6.6406e-04 - accuracy: 0.0338 - val_loss: 0.0851 - val_accuracy: 0.0874 Epoch 30/750 29/29 [==============================] - 7s 230ms/step - loss: 7.3927e-04 - accuracy: 0.0543 - val_loss: 0.0672 - val_accuracy: 0.0874 Epoch 31/750 29/29 [==============================] - 7s 229ms/step - loss: 8.1294e-04 - accuracy: 0.0435 - val_loss: 0.0652 - val_accuracy: 0.0874 Epoch 32/750 29/29 [==============================] - 7s 227ms/step - loss: 7.0188e-04 - accuracy: 0.0283 - val_loss: 0.0713 - val_accuracy: 0.0874 Epoch 33/750 29/29 [==============================] - 7s 228ms/step - loss: 7.1076e-04 - accuracy: 0.0683 - val_loss: 0.0543 - val_accuracy: 0.0874 Epoch 34/750 29/29 [==============================] - 7s 238ms/step - loss: 7.6645e-04 - accuracy: 0.0407 - val_loss: 0.0415 - val_accuracy: 0.0874 Epoch 35/750 29/29 [==============================] - 7s 228ms/step - loss: 7.8250e-04 - accuracy: 0.0398 - val_loss: 0.0417 - val_accuracy: 0.0874 Epoch 36/750 29/29 [==============================] - 7s 228ms/step - loss: 7.4827e-04 - accuracy: 0.0278 - val_loss: 0.0698 - val_accuracy: 0.0874 Epoch 37/750 29/29 [==============================] - 7s 231ms/step - loss: 6.5613e-04 - accuracy: 0.0228 - val_loss: 0.0527 - val_accuracy: 0.0874 Epoch 38/750 29/29 [==============================] - 7s 228ms/step - loss: 5.8132e-04 - accuracy: 0.0449 - val_loss: 0.0640 - val_accuracy: 0.0874 Epoch 39/750 29/29 [==============================] - 7s 229ms/step - loss: 7.4772e-04 - accuracy: 0.0391 - val_loss: 0.0397 - val_accuracy: 0.0680 Epoch 40/750 29/29 [==============================] - 7s 232ms/step - loss: 7.7066e-04 - accuracy: 0.0566 - val_loss: 0.0658 - val_accuracy: 0.0874 Epoch 41/750 29/29 [==============================] - 7s 230ms/step - loss: 8.1629e-04 - accuracy: 0.0378 - val_loss: 0.0602 - val_accuracy: 0.0971 Epoch 42/750 29/29 [==============================] - 7s 227ms/step - loss: 6.9382e-04 - accuracy: 0.0646 - val_loss: 0.0373 - val_accuracy: 0.0777 Epoch 43/750 29/29 [==============================] - 7s 227ms/step - loss: 0.0012 - accuracy: 0.0779 - val_loss: 0.1193 - val_accuracy: 0.0874 Epoch 44/750 29/29 [==============================] - 7s 227ms/step - loss: 7.3846e-04 - accuracy: 0.0403 - val_loss: 0.0760 - val_accuracy: 0.0874 Epoch 45/750 29/29 [==============================] - 7s 227ms/step - loss: 7.4030e-04 - accuracy: 0.0327 - val_loss: 0.0615 - val_accuracy: 0.0874 Epoch 46/750 29/29 [==============================] - 7s 228ms/step - loss: 6.2122e-04 - accuracy: 0.0429 - val_loss: 0.0552 - val_accuracy: 0.0874 Epoch 47/750 29/29 [==============================] - 7s 227ms/step - loss: 5.5215e-04 - accuracy: 0.0382 - val_loss: 0.0477 - val_accuracy: 0.0874 Epoch 48/750 29/29 [==============================] - 7s 228ms/step - loss: 5.5723e-04 - accuracy: 0.0498 - val_loss: 0.0499 - val_accuracy: 0.0874 Epoch 49/750 29/29 [==============================] - 7s 228ms/step - loss: 6.6943e-04 - accuracy: 0.0617 - val_loss: 0.0450 - val_accuracy: 0.0874 Epoch 50/750 29/29 [==============================] - 7s 227ms/step - loss: 6.9006e-04 - accuracy: 0.0413 - val_loss: 0.0633 - val_accuracy: 0.0874 Epoch 51/750 29/29 [==============================] - 7s 227ms/step - loss: 6.7869e-04 - accuracy: 0.0409 - val_loss: 0.0479 - val_accuracy: 0.0874 Epoch 52/750 29/29 [==============================] - 7s 238ms/step - loss: 5.3305e-04 - accuracy: 0.0407 - val_loss: 0.0742 - val_accuracy: 0.0874 Epoch 53/750 29/29 [==============================] - 7s 227ms/step - loss: 5.2520e-04 - accuracy: 0.0233 - val_loss: 0.0530 - val_accuracy: 0.0874 Epoch 54/750 29/29 [==============================] - 7s 226ms/step - loss: 5.3272e-04 - accuracy: 0.0616 - val_loss: 0.0723 - val_accuracy: 0.1165 Epoch 55/750 29/29 [==============================] - 7s 231ms/step - loss: 6.7017e-04 - accuracy: 0.0443 - val_loss: 0.0581 - val_accuracy: 0.0874 Epoch 56/750 29/29 [==============================] - 7s 228ms/step - loss: 5.1613e-04 - accuracy: 0.0556 - val_loss: 0.0553 - val_accuracy: 0.0874 Epoch 57/750 29/29 [==============================] - 7s 228ms/step - loss: 4.8905e-04 - accuracy: 0.0410 - val_loss: 0.0544 - val_accuracy: 0.0874 Epoch 58/750 29/29 [==============================] - 7s 227ms/step - loss: 4.5829e-04 - accuracy: 0.0589 - val_loss: 0.0631 - val_accuracy: 0.0874 Epoch 59/750 29/29 [==============================] - 7s 227ms/step - loss: 5.8111e-04 - accuracy: 0.0770 - val_loss: 0.0622 - val_accuracy: 0.0874 Epoch 60/750 29/29 [==============================] - 7s 226ms/step - loss: 5.3620e-04 - accuracy: 0.0437 - val_loss: 0.0563 - val_accuracy: 0.0874 Epoch 61/750 29/29 [==============================] - 7s 227ms/step - loss: 6.3099e-04 - accuracy: 0.0548 - val_loss: 0.0564 - val_accuracy: 0.0874 Epoch 62/750 29/29 [==============================] - 7s 227ms/step - loss: 4.5764e-04 - accuracy: 0.0424 - val_loss: 0.0456 - val_accuracy: 0.1165 Epoch 63/750 29/29 [==============================] - 7s 227ms/step - loss: 5.3795e-04 - accuracy: 0.0627 - val_loss: 0.0570 - val_accuracy: 0.0874 Epoch 64/750 29/29 [==============================] - 7s 226ms/step - loss: 6.2734e-04 - accuracy: 0.0525 - val_loss: 0.0559 - val_accuracy: 0.0874 Epoch 65/750 29/29 [==============================] - 7s 227ms/step - loss: 4.9405e-04 - accuracy: 0.0544 - val_loss: 0.0530 - val_accuracy: 0.1165 Epoch 66/750 29/29 [==============================] - 7s 227ms/step - loss: 5.3328e-04 - accuracy: 0.0565 - val_loss: 0.0512 - val_accuracy: 0.0874 Epoch 67/750 29/29 [==============================] - 7s 226ms/step - loss: 4.6163e-04 - accuracy: 0.0577 - val_loss: 0.0697 - val_accuracy: 0.1165 Epoch 00067: early stopping
# Exporting the regressor
today = str(datetime.datetime.today().date())
params = ['reg', today, n_past_total, n_past, n_future, activation, n_layers, n_neurons, n_features, patience, optimizer]
filename = ''
for i in params:
print(i)
filename += str(i)
if i!= params[-1]:
filename += '_'
filename+='.h5'
regressor.save('model/{}'.format(filename))
reg 2021-06-09 1200 120 30 softsign 6 15 2 25 adam INFO:tensorflow:Assets written to: model/filename\assets
list(res.history)
['loss', 'accuracy', 'val_loss', 'val_accuracy']
# Plotting Accuracy and Loss
results = res
history = results.history
plt.figure(figsize=(12,4))
plt.plot(history['val_loss'])
plt.plot(history['loss'])
plt.legend(['val_loss', 'loss'])
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()
plt.figure(figsize=(12,4))
plt.plot(history['val_accuracy'])
plt.plot(history['accuracy'])
plt.legend(['val_accuracy', 'accuracy'])
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()
def dummy_invscaler(y):
'''
Since the scaler was trained into 2 features, it needs two features to perform the inverse scaleer.
For that purpose, this function will create a dummy array and concatenate it to the y_pred/y_true.
That dummy of ones will be drop after performing the inverse_transform.
INPUTS: array 'y', shape (X,)
'''
dummy = np.ones((len(y),1))
y = np.array(y).reshape(-1,1)
y = np.concatenate((y, dummy), axis=1)
y = sc.inverse_transform(y)
y = y[:,0]
return y
# Validation (validation with the last training days -> -15-15 DAYS)
# getting the predictions
y_pred = regressor.predict(X_train[-1].reshape(1, n_past, n_features)).tolist()[0]
#dummy = np.ones((len(y_pred),1))
#y_pred = np.array(y_pred).reshape(-1,1)
#y_pred = np.concatenate((y_pred, dummy), axis=1)
#y_pred = sc.inverse_transform(y_pred)#.tolist()
#y_pred = y_pred[:,0]
y_pred = dummy_invscaler(y_pred)
#y_pred = sc.inverse_transform(np.array(y_pred).reshape(-1,1))#.tolist()
# getting the true values
#y_true = y_train[-1].reshape(-1,1)
#y_true = np.concatenate((y_true, dummy), axis=1)
#y_true = sc.inverse_transform(y_true)
#y_true = y_true[:,0]
y_true = dummy_invscaler(y_train[-1])
print('y_pred:\n', y_pred.tolist())
print('y_true:\n', y_true.tolist())
# plotting the results
plt.figure(figsize=(16,5))
plt.plot(y_pred, label='Predicted')
plt.plot(y_true, label='True')
dates = df_train.index[-n_future:]
dates = [str(dates.date[i]) for i in range(len(dates))]
plt.xticks(range(n_future),dates)
plt.title('BTC price Predicted vs True')
plt.legend()
plt.show()
y_pred: [26704.465916178073, 27517.522390125443, 27271.691896336917, 28508.68967540497, 29189.196689199645, 29472.05711862372, 30384.713827849453, 31163.14353121343, 31527.217648353115, 32469.32121186147, 33067.59974851268, 34330.41269297287, 35542.76815177513, 35962.06836683993, 38595.208052488466, 38799.0352444012, 39105.31845069016, 41550.42573775083, 41363.664969100886, 42657.42270480447, 42913.07420494965, 42985.521189661085, 44873.144475070316, 45797.00517801456, 45250.480902183954, 45792.3712723086, 46289.636052832386, 46354.23054306845, 46774.45753927444, 46921.09730573071] y_true: [56048.9375, 58323.953125, 58245.00390599999, 59793.234375, 60204.96484400001, 59893.453125, 63503.45703099999, 63109.695313, 63314.01171900001, 61572.789063, 60683.82031299999, 56216.18359400001, 55724.265625, 56473.03125, 53906.08984400001, 51762.273438, 51093.652344, 50050.867188, 49004.253906, 54021.75390599999, 55033.117188, 54824.703125, 53555.109375, 57750.17578099999, 57828.05078099999, 56631.078125, 57200.29296900001, 53333.539063, 57424.007813, 56396.515625]
# Validation 2 (on the TEST SET -15 days)
# getting the predictions
x = df[['Close','Volume']][-n_past-n_future:-n_future].values
x = sc.transform(x)
y_pred = regressor.predict(x.reshape(1, n_past, n_features)).tolist()[0]
#y_pred = sc.inverse_transform(np.array(y_pred).reshape(-1,1))#.tolist()
y_pred = dummy_invscaler(y_pred)
# getting the true values
y_true = df['Close'][-n_future:].values
print('y_pred:\n', y_pred.tolist())
print('y_true:\n', y_true.tolist())
# plotting the results
plt.figure(figsize=(16,5))
plt.plot(y_pred, label='Predicted')
plt.plot(y_true, label='True')
dates = df.index[-n_future:]
dates = [str(dates.date[i]) for i in range(len(dates))]
plt.xticks(range(n_future),dates)
plt.title('BTC price Predicted vs True')
plt.legend()
plt.show()
y_pred: [26681.585557733037, 27494.210970684504, 27245.4007677618, 28477.462898426766, 29162.350569592305, 29444.927217194076, 30353.151182511938, 31131.438994964763, 31496.306982771905, 32438.604523728412, 33035.805407889864, 34290.834109199044, 35504.80245456098, 35922.6478387646, 38552.46835474468, 38753.11287963767, 39059.89899042185, 41500.02393080487, 41321.13361750512, 42608.65892964294, 42866.54117044192, 42936.297616103926, 44816.578495882946, 45737.96059810074, 45189.27024076573, 45740.22007615426, 46230.67768511775, 46298.33270842473, 46721.232282805315, 46867.17157514324] y_true: [57356.40234400001, 58803.77734400001, 58232.31640599999, 55859.796875, 56704.57421900001, 49150.535156, 49716.191406, 49880.535156, 46760.1875, 46456.058594, 43537.511719, 42909.402344, 37002.441406, 40782.738281, 37304.691406, 37536.632813, 34770.582031, 38705.980469, 38402.222656, 39294.199219, 38436.96875, 35697.605469, 34616.066406, 35678.128906, 37332.855469, 36684.925781, 37575.179688, 39208.765625, 36894.40625, 35746.101563]
# evaluation
rmse = math.sqrt(mean_squared_error(y_true, y_pred))
mean_absolute_error(y_true, y_pred)
12059.573530830456
explained_variance_score(y_true, y_pred)
# Best possible score is 1.0, lower values are worse.
-2.350093924286009
# Forecasting/Predicting
# getting the predictions
x = df[['Close','Volume']][-n_past:].values
x = sc.transform(x)
y_pred = regressor.predict(x.reshape(1, n_past, n_features)).tolist()[0]
#y_pred = sc.inverse_transform(np.array(y_pred).reshape(-1,1))#.tolist()
y_pred = dummy_invscaler(y_pred)
# creating a DF of the predicted prices
y_pred_df = pd.DataFrame(y_pred,
index=pd.date_range(start=df[['Close']].index[-1]+datetime.timedelta(days=1),
periods=len(y_pred),
freq="D"),
columns=df[['Close']].columns)
# getting the true values
y_true_df = df[['Close']].tail(n_future)
# linking them
y_true_df = y_true_df.append(y_pred_df.head(1))
print('y_pred:\n', y_pred.tolist())
print('y_true:\n', y_true.tolist())
# plotting the results
plt.figure(figsize=(12,5))
plt.plot(y_pred_df, label='Predicted')
plt.plot(y_true_df, label='True')
#dates = df.index[-n_future:]
#dates = [str(dates.date[i]) for i in range(len(dates))]
#plt.xticks(range(n_future),dates)
plt.title('BTC price Predicted vs True')
plt.legend()
plt.show()
y_pred: [26655.66981144185, 27467.64683681198, 27219.366479696742, 28445.894864826798, 29131.46864141082, 29414.95051710399, 30320.0439019391, 31098.067689531938, 31462.750680328336, 32403.598778686122, 33000.63801497411, 34250.2676773096, 35462.953616208724, 35881.23006140825, 38502.99692111488, 38704.60774107368, 39012.27752689946, 41443.78124736443, 41270.124732989934, 42553.65913874068, 42814.181628139566, 42882.89993523644, 44755.601325837495, 45673.24038174087, 45128.066763697425, 45683.320024695066, 46171.10864765892, 46240.75014372202, 46663.27972408112, 46808.38563182697] y_true: [57356.40234400001, 58803.77734400001, 58232.31640599999, 55859.796875, 56704.57421900001, 49150.535156, 49716.191406, 49880.535156, 46760.1875, 46456.058594, 43537.511719, 42909.402344, 37002.441406, 40782.738281, 37304.691406, 37536.632813, 34770.582031, 38705.980469, 38402.222656, 39294.199219, 38436.96875, 35697.605469, 34616.066406, 35678.128906, 37332.855469, 36684.925781, 37575.179688, 39208.765625, 36894.40625, 35746.101563]